**********************************
**# A. Occupation Classifications 
**********************************	
*Identify Production Occupations
use "analysis/occ1990_occ1990dd_GLL.dta", clear
	* m=1: 10 occ1990 codes that map to census1990 codes for which census1990 codes not in Dorn concordance
	* m=2: 1 obs only in dorn concordance. Can't do anything with this "new" occ
drop if _m==2
bys occ1990: egen m_min=min(_m)
bys occ1990: egen m_max=max(_m)
order _m m_m*
	* 8 of the 10 m=1 occ1990 codes map to multiple census1990 codes and that census1990 code is in Dorn concordance
	* The other 2 codes are 905="Military" and 999="N/A and unknown"
drop if occGroup == .
keep occ1990 occGroup occupationD
duplicates drop
keep if occGroup >= 4 & occGroup <= 6
	* 1: Managers, professionals, technology, finance, public saftey
	* 2: Clerical, retail sales
	* 3: Low skill services
	* 4: Production, craft
	* 5: Machine operators, assemblers
	* 6: Transport, construction, mechanical, mining, farm
drop occGroup
save "processing/occ_productioncat", replace


* Concordance weights
use "analysis/occ1990_occ1990dd_GLL.dta", clear
drop if _m==2
	* single dd occ (occdd == 874 that in dd concordance but not in ipums data)
assert _m==1 if occ1990dd==.
assert occ1990dd==. if _m==1 
drop if occ1990dd==.
	* Don't want to split the 8 occ1990 codes above into multiple occ1990-occ1990dd pairs and assign them weights because
	* occ1990dd is missing for one of the pairs. The weight should just be applied to the non-missing occ1990-occ1990dd 
	* pair. To do otherwise is to risk later assigning the non-missing and missing pairs to different routine v
	* non-routine or production v non-production classifications
keep occ1990 occ1990dd
egen weight = total(1), by(occ1990)
replace weight = 1/weight
label var weight "Weight"
save "analysis/occ1990_occ1990dd_weights.dta", replace


*Identify Routine occs
use "raw/occ1990dd_alm.dta", clear
qui sum RTI, d
keep if RTI > `r(p50)'
keep occ1990dd
save "processing/routine_occs", replace


*************************************
**# B. Employment (Total and by Sex) 
*************************************
*Looping over total, routine occupations only, non-routine occupations only
foreach rout in 1 2 3 {
	* rout = 1: all
	* rout = 2: routine
	* rout = 3: non-routine

use  "analysis/ipums_1970_1990", clear
keep if empstat == 1 & age >= 25 & age <= 65 & occ1990!=905
assert occ1990!=999
	* No obs with occ1990 = "N/A and unknown"
 
if `rout' == 2 {
	local routinereg "routine"
	joinby occ1990 using "analysis/occ1990_occ1990dd_weights.dta", unmatched(both)
	assert _m == 3 if occ1990!=905
	keep if _m==3
	drop _m
	replace perwt = perwt*weight

	merge m:1 occ1990dd using "processing/routine_occs"
	assert _m!=2
	keep if _m == 3
		* These are the routine occs
	drop _merge
}

if `rout' == 3 {
	local routinereg "non_routine"
	joinby occ1990 using "analysis/occ1990_occ1990dd_weights.dta", unmatched(both)
	assert _m == 3 if occ1990!=905
	keep if _m==3
	drop _m
	replace perwt = perwt*weight

	merge m:1 occ1990dd using "processing/routine_occs"
	assert _m!=2
	keep if _m == 1
		* These are the non-routine occs

	drop _merge
}

merge m:1 occ1990 using "processing/occ_productioncat"
drop if _m == 2
gen x=inlist(occ1990,389,405,804,675,628,349,436,779)
tab occ1990 if x==1
tab _m if x==1
tab _m occ1990 if x==1, nol

gen prod = (_m == 3)
drop _m

*We want to calculate: 
	*the change in the share of production pay (emp) accounted for by women
	*the change in the share of non-production pay (emp) accounted for by women
	*the change in the share of female pay accounted for by production
	*the change in the share of male pay accounted for by production

	*the change in the log difference of pay (emp) to non-production relative to production
	*the change in the log difference of pay (emp) to non-production relative to production for men
	*the change in the log difference of pay (emp) to non-production relative to production for women

gen women = (sex == 2)
gen men = (sex == 1)

gen nonprod = 1-prod

gen women_nonprod = women*nonprod
gen women_prod = women*prod

gen men_nonprod = men*nonprod
gen men_prod = men*prod

gen coll = educ >= 10
gen noncoll = educ < 10

gen women_noncoll = women*noncoll
gen women_coll = women*coll

gen men_noncoll = men*noncoll
gen men_coll = men*coll

gen tot_coll = coll
gen tot_noncoll = noncoll

gen tot = 1

foreach x in men women prod nonprod tot women_nonprod women_prod men_nonprod men_prod ///
women_noncoll women_coll men_noncoll men_coll tot_coll tot_noncoll {
	gen `x'_emp = `x'*perwt
	gen `x'_pay = `x'*perwt*incwage
}

collapse (sum) *_emp *_pay, by(ind1990 year)

xtset ind1990 year

foreach out in emp pay {
	
	* Levels
	gen w_prod_tot_`out'_share = women_prod_`out'/prod_`out'
	gen w_nonprod_tot_`out'_share = women_nonprod_`out'/nonprod_`out'
	gen w_prod_women_`out'_share = women_prod_`out'/women_`out'
	gen m_prod_men_`out'_share = men_prod_`out'/men_`out'
	
	gen m_ln_skill_diff_`out' = ln(men_nonprod_`out') - ln(men_prod_`out')
	gen w_ln_skill_diff_`out' = ln(women_nonprod_`out') - ln(women_prod_`out')
	gen ln_skill_diff_`out' = ln(nonprod_`out') - ln(prod_`out')

	gen m_ln_coll_diff_`out' = ln(men_coll_`out') - ln(men_noncoll_`out')
	gen w_ln_coll_diff_`out' = ln(women_coll_`out') - ln(women_noncoll_`out')
	gen ln_coll_diff_`out' = ln(tot_coll_`out') - ln(tot_noncoll_`out')

	* Forward and lagged changes
	loc ys "w_prod_tot w_nonprod_tot w_prod_women m_prod_men"
	foreach y in `ys' {
	gen   `y'_`out'_share_ch = f10.`y'_`out'_share -     `y'_`out'_share
	gen l_`y'_`out'_share_ch =     `y'_`out'_share - l10.`y'_`out'_share
	}
	
	loc ys "m_ln_skill_diff w_ln_skill_diff ln_skill_diff"
	loc ys "`ys' m_ln_coll_diff w_ln_coll_diff ln_coll_diff"
	foreach y in `ys' {
	gen   `y'_`out'_ch = f10.`y'_`out' -     `y'_`out'
	gen l_`y'_`out'_ch =     `y'_`out' - l10.`y'_`out'
	}
}

if `rout' == 1 {

	gen women_ind_share = women_emp/tot_emp

	preserve 
	keep if year == 1980
	keep women_ind_share ind1990
	save "processing/ind_women_share_1980", replace
	restore
	
}

* How much emp do the four 1970-only inds account for?
preserve
	bys year: egen empT=total(tot_emp)
	drop if year==1990
	gen y80_only = inlist(ind1990,210,232,362,390)
	bys year: ereplace y80_only=total(y80_only*tot_emp)
	gen sh=y80_only/empT
		* These 4 inds account for 0.48% of 1980 US emp 
		* 359,720 total emp: 49.9% Guided missiles & space vehicles 362; 27.4% Toys/amusement/sporting 390; 11.8% each for 
		* Tires & inner tubes, Wood Buildings & Mobile homes
		* But 362 Guided missiles will drop later because have no export tariff for it. So, really dropping 0.24% of 1980 US 
		* emp if drop these the three inds 210, 232, 390
		* Makes sense to drop these because using 1970 SIC ind characteristics as controls and these industries didn't exist 
		* in 1970 IPUMS emp data
restore

keep year ind1990 *_ch
drop if year == 1990

egen n = total(1), by(ind1990)
	* n=1 -> 26 inds NOT observed in 1970
		* 4 manuf census inds
		* census 210 tires & inner tubes			    == sic 301
		* census 232 wood buildings & mobile homes	    == sic 245 
		* census 362 guided missiles & space vehicles 	== sic 376 
		* census 390 toys/amusement/sporting 		    == sic 394
	* n=2 -> all other 202 unique inds observed in both 1970 and 1980 
drop n
drop if year==1970

unique ind1990
	* 228 inds

keep year ind1990 ln_coll_diff_pay_ch m_ln_coll_diff_pay_ch w_ln_coll_diff_pay_ch *skill_diff_pay_ch
if `rout'==2 loc r "routine"
if `rout'==3 loc r "non-routine" 
 
label var ln_skill_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Non-Prod}}{Pay_i^{Prod}}) \$ 79-87 `r'"
label var m_ln_skill_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Non-Prod}}{Pay_i^{Prod}}) \$ 79-87 `r' males"
label var w_ln_skill_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Non-Prod}}{Pay_i^{Prod}}) \$ 79-87 `r' women"
label var l_ln_skill_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Non-Prod}}{Pay_i^{Prod}}) \$ 72-79 `r'"
label var l_m_ln_skill_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Non-Prod}}{Pay_i^{Prod}}) \$ 72-79 `r' males"
label var l_w_ln_skill_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Non-Prod}}{Pay_i^{Prod}}) \$ 72-79 `r' women"
label var ln_coll_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Coll}}{Pay_i^{Non-Coll}}) \$ 79-87 `r'"
label var m_ln_coll_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Coll}}{Pay_i^{Non-Coll}}) \$ 79-87 `r' males"
label var w_ln_coll_diff_pay_ch "\$ \Delta \ln(\frac{Pay_i^{Coll}}{Pay_i^{Non-Coll}}) \$ 79-87 `r' women"

save "analysis/ind1990_changes_rout`rout'", replace


}	

***************************
**# C. Non-employment Data
***************************
* SIC data
use "processing/SIC87_Final_Dataset.dta", clear
gen sic3=int(sic/10)
unique sic3 if inlist(sic3,245,301,376,394)==1

* MFA
merge m:1 sic using "processing/mfa_sic_shares"
	* m=2: 65 unique sic codes with mfa data but not in our sic dataset
assert _m!=1
drop if _m == 2
drop _m
gen  mfa = mfa_share>0

* Foreign tariffs
merge m:1 sic using "processing/other_tariff_changes"
	* m=1: 4 unique sic inds in our export tariff dataset for which we don't have sic IO data
	* m=2: 48 sic inds in our sic IO data that not in export tariff data
drop if _m == 2
drop _m

* Price growth
merge m:1 sic using "processing/sic_price_growth"
keep if _m == 3 
drop _m


* Other vars
xtset sic year
gen lag_ave_ols_ch = ave_ols - l7.ave_ols
gen lag_invest_change = ln_invest_78-ln_invest_70
gen lag_invest_def_ch = ln_invest_def_78 - ln_invest_def_70

replace sts = 0 if sts==.

egen exp79 = max(exp(ln_exp)*(year==1979)), by(sic)

gen sts_temp = sts*(year==1972)
drop sts
egen sts = max(sts_temp), by(sic)
drop sts_temp

gen sts_temp = sts*(year==1979)
egen sts79 = max(sts_temp), by(sic)
drop sts_temp
gen ave79 = ave_ols*(year==1979)
egen ave_m79 = max((ave79)/(1+ave79)), by(sic)
gen rho79 = sts79*ave_m79


* Merge to IND 1990 
rename sic sic4
tostring sic4, replace
assert length(sic4)==4
joinby sic4 using "raw/sic4_2_census90_v2.dta", unmatched(both)
assert _m!=1
drop if _merge == 2 
assert census != ""
drop _merge

egen totn = total(1), by(sic4)

collapse ln_invest_78 ln_invest_def_78 lag_invest_def_ch mat_ship_def_78 ln_cap_lab_78 other_tariff_change mfa sts lag_invest_change ///
skill_emp_share_78 ave_iv_upstream_impwt ave_ols_upstream_impwt dln_pstar_vw* rho lag_ave_ols_ch ///
(sum) tariffs imports tariffs_c2 imports_c2 , by(census90 year)


*Changes in IV and OLS
gen ave_iv_swiss = ln(1+tariffs_c2/imports_c2)
gen ave_ols = ln(1+tariffs/imports)

destring census90, replace
xtset census90 year
gen ave_iv_ch = (f8.ave_iv_swiss-ave_iv_swiss) if year == 1979
gen ave_ols_ch = (f8.ave_ols-ave_ols) if year == 1979

gen ave_iv_up_ch = (f8.ave_iv_upstream_impwt - ave_iv_upstream_impwt) if year == 1979
gen ave_ols_up_ch = (f8.ave_ols_upstream_impwt - ave_ols_upstream_impwt) if year == 1979

keep if year == 1979 
replace year = 1980 if year == 1979
rename census90 ind1990

* Women's emp share
merge m:1 ind1990 using "processing/ind_women_share_1980"
assert _m!=1
	* m=2: same 151 census ind1990 codes in ipums emp data that not in our sic dataset
keep if _m == 3
drop _m

* Routineness
merge 1:1 ind1990 using "processing/routine_ind1990"
assert _m!=1
	* m=2: 151 census ind1990 codes in ipums emp data that not in our sic dataset
keep if _m == 3
drop _m

rename RTIa rti

* Automation
merge m:1 ind1990 using "processing/automation_ind1990_1947_1978", nogen keep(1 3)
rename automation automation78
gen automation = automation78>.173

replace mfa = mfa>0

**# D. Clean up
label var ind1990 "ind1990 variable in IPUMS"
label var ln_cap_lab_78 "\$ \frac{Captial_{i}}{Labor_{i}} \$"
label var women_ind_share "\$ \frac{ Emp^{Women}_{i}}{Emp_{i}} \$"
label var ave_iv_ch "\$ \Delta \ln\left({1+AVE^{IV}_{i}}\right)  \$"
label var ave_ols_ch "\$ \Delta \ln\left({1+AVE_{i}}\right)  \$"
label var other_tariff_change "\$ \Delta {AVE^{Exports}_{i}}  \$"
label var lag_ave_ols_ch  "\$\Delta AVE_{i,t-1}\$"
label var dln_pstar_vw_7279_alt  "\$\Delta \ln(p^*_{i,t-1})\$"
label var rho79 		  "\$ STS_{i}*\frac{AVE_{i}}{1+AVE_{i}}\$" 
label var lag_invest_def_ch "\$ \Delta \ln(Investment)_{i,t-1}    \$"
label var ln_invest_def_78 "\$ \ln(Investment_{i})  \$"

label var lag_invest_change "\$ \Delta \ln(Investment)_{i,t-1}    \$"
label var skill_emp_share_78 "\$ \frac{Emp^{Non-Prod}_{i}}{Emp._{i}} \$"
label var rti "\$Routineness_{i}\$"
label var ln_invest_78 "\$ \ln(Investment_{i})  \$"

label var ln_invest_def_78 "\$ \ln(Investment_{i})  \$"
label var  mat_ship_def_78 "\$\frac{Materials_i}{Shipments_i}\$"
	
label var mfa "\$ I(MFA_i) \$ "

label var automation "\$ Automation_i \$"
label var automation78 "\$ Automation_i \$"

loc tariff_controls dln_pstar_vw_7279_alt mfa rho other_tariff_change
loc prod_controls 	ln_cap_lab_78 skill_emp_share_78 women_ind_share mat_ship_def_78
loc rti_controls 	ln_invest_def_78 lag_invest_def_ch rti automation78 
loc otherVars		automation ave_ols_ch  ave_iv_ch

keep ind1990 year `tariff_controls' `prod_controls' `rti_controls' `tariffVars' `tradeVars' `otherVars'

save "analysis/ipums_industry_data.dta", replace